/* This program creates the baseline dataset for Hoen, Brunner, and Schwegman, "An analysis of home price premiums in school districts with high levels of wind energy development." 
The program merges in multiple dataset from the NCES Common Core including fiscal data from 2005-2021 from the F33 files, pupil-teacher ratios from the common core, census data from the special tabluation of the 2000 census school district files, and data from the LBNL wind turbine database. */

log using dataset, replace

global Data "R:\CLAS_Brunner\District_Capitalization\Data"

* Bring in Raw NCES F33 Fiscal Data
use "$Data\nces_fiscal"
drop if year<2005

* Merge with nonfiscal data 

merge 1:1 ncesid year using "$Data\ptratio"
drop if _merge==2
drop _merge

merge m:1 ncesid using "$Data\nces_vars"
drop if _merge==2
drop _merge


* Fill in missing ccode
destring ccode, replace
egen temp=max(ccode), by(ncesid)
replace ccode=temp if ccode==.
drop temp

* Drop nonstandard districts
drop if ncesid==3901374
drop if ncesid==3901535
drop if ncesid==3901490
drop if ncesid==3901512



************************************************
nsplit ccode, digits(2 3) gen(fipst1 fips_county)
*************************************************

* Merge in County Poverty Data
replace fips_county=25 if ncesid==1200390
replace fips_county=187 if ncesid==1900028
replace fips_county=186 if ncesid==2929400
replace fips_county=186 if ncesid==2929370 
replace fips_county=231 if fipst==2 & fips_county==105
replace fips_county=231 if fipst==2 & fips_county==232
replace fips_county=231 if fipst==2 & fips_county==230
replace fips_county=231 if fipst==2 & fips_county==282
replace fips_county=261 if fipst==2 & fips_county==260
replace fips_county=261 if fipst==2 & fips_county==80
replace fips_county=13 if fipst==2 & fips_county==10
replace fips_county=185 if fipst==2 & fips_county==40

merge m:1 fipst fips_county using "$Data\county_income"
drop if _merge==2
drop _merge

label var cmedhhinc "County Median HH Inc 2000"
label var cpov "County Poverty Rate 2000"

nsplit ncesid, digits(2 5) gen(temp ncesid1)
drop temp*

replace denrl=1838 if ncesid==631170 & year==2012

* Create Outcomes
for x in any trev frev srev srev_cap lrev plcont ptax_rev stax_rev olcont ///
texp cexp cexp_inst v91 v92 cexp_ss cexp_ssp cexp_ssis cexp_ssga cexp_sssa ///
cexp_ssom cexp_ssst cexp_ssbco cexp_ssns cexp_o tnelsc_exp tcapital ///
construction capland capie capoe capne l12 m12 q11 i86 tsalary sal_inst ///
tbenefits ben_inst ltdebt_bfy ltdebt_issued ltdebt_retired ltdebt_efy ///
std_bfy: replace x=. if x<0


**************************************************************************


* Merge in 2000 District characteristics for pre-determined controls
merge m:1 ncesid using "$Data\district_census_00"
drop if _merge==2
drop _merge

merge m:1 ncesid using "$Data\nces_locale"
drop if _merge==2
drop _merge


merge m:1 stfips using "$Data\regions"
drop if _merge==2
drop _merge


* Merge in 2004-05 baseline district characteristics from NCES
merge m:1 ncesid using "$Data\nces_nfiscal_05"
drop if _merge==2
drop _merge

* Merge in County leve data on Population density and demographics
merge m:1 fipst fips_county using "$Data\census_county00"
drop if _merge==2
drop _merge

gen pop_dens00= tpop_00/cland_area

label var pop_dens00 "County Population Density 2000"
replace white_00 = cpwhite_00 if white_00==.
replace hisp_00 = cphisp_00 if hisp_00==.
replace black_00=cpblack_00 if black_00==.
replace pop65_00 = cpage65_00 if pop65_00==.
replace pop55_00 = cpage55_00 if pop55_00==.

gen pwhite_05=white_05/tstudents_05
gen pblack_05=black_05/tstudents_05
gen phisp_05=hisp_05/tstudents_05
gen plep_05=lep_05/tstudents_05
gen pnwhite_05=1-pwhite_05

drop white_05 black_05 hisp_05 lep_05
label var pwhite_05 "Share students white Non-Hispanic 2005"
label var pblack_05 "Share students Black 2005"
label var phisp_05 "Share students Hispanic 2005"
label var plep_05 "Share students limited English 2005"
label var pnwhite_05 "Share students non-white 2005"



replace ucode10=ucode15 if ucode10==""
replace local00=local05 if local00==""
replace local00=local95 if local00==""


label var pop55_00 "share pop 55 or older 2000"
label var pop65_00 "Share pop 65 or older 2000"
label var white_00 "Share pop white non-Hispanic 2000"
label var hisp_00 "Share pop Hispanic 2000"
label var black_00 "Share pop black 2000"
label var pcoll_00 "Share pop college degree or higher 2000"
label var pown_00 "Share homeowner 2000"
label var cpov_child "County Percent pop under 17 in poverty 2000" 
label var cname "County name"
label var ccode "County FIPS code"
label var ucode10 "Urban Code Census"
label var local00 "Locale Code"
label var tstudents_05 "Total Students NCES 2005"
label var region "Census regions"
label var region1 "Expanded Census regions"

replace medhhinc=cmedhhinc if medhhinc==.

* Generate Control Variables
sort ncesid year
qui by ncesid: gen obs=_n
gen temp=denrl if obs==1
egen enrl_fy=max(temp), by(ncesid)
label var enrl_fy "enrollment in base year"
drop temp obs

gen elem=0
replace elem=1 if schlev=="01"


gen rural1=0
replace rural1=1 if ucode10=="31-Town: Fringe"
replace rural1=1 if ucode10=="32-Town: Distant"
replace rural1=1 if ucode10=="33-Town: Remote"
replace rural1=1 if ucode10=="41-Rural: Fringe"
replace rural1=1 if ucode10=="42-Rural: Distant"
replace rural1=1 if ucode10=="43-Rural: Remote"

*********************
	
* Update county codes

merge m:1 ncesid using "$Data\ncesid_county"
drop if _merge==2
drop _merge

replace ccode=ccode1 if ccode==. & ccode1~=.


* Merge in Commuting Zones
merge m:1 ccode using "$Data\cw_cty_czone"
drop if _merge==2
drop _merge
egen stid=group(fipst)

save "$Data\temp", replace

************************************************

/* Bring in 2021 data so we have as much housing data as possible. To do that we 
append a dataset containing all the time invariant variables we just created so 
everything other than the fiscal outcomes */

sort ncesid
keep  ncesid name schlev agchrt denrl fipst1 fips_county cpov cpov_child cmedhhinc ///
cname ncesid1 pcoll_00 pown_00 medhhinc_00 pop55_00 pop65_00 white_00 hisp_00 black_00 tpop18_00 ///
phs plhs_00 pscol unemp_00 ucode10 local00 rural region region1 tstudents_05 locale_05 dtype_05 county_fips ///
cland_area tpop_00 cphisp_00 cpwhite_00 cpblack_00 cpage65_00 cpage55_00 pop_dens00 pwhite_05 pblack_05 phisp_05 ///
plep_05 pnwhite_05 enrl_fy elem rural1 stid  ccode fipst ccode1 czone cty_pop cty_labf

qui by ncesid: gen obs=_n
drop if obs>1
drop obs
gen year=2021
save "$Data\data_2021", replace
clear

use "$Data\temp"
append using "$Data\data_2021"


* Merge in actual enrollment and pupil teacher ratio in 2021

merge m:1 ncesid year using "$Data\nces_vars1"
drop if _merge==2
drop _merge

replace denrl=tstudent if year==2021
replace ptratio=ptratio1 if year==2021


*************************************************
gen cpi=0

replace cpi=152.4 if year==1995
replace cpi=156.9 if year==1996
replace cpi=160.5 if year==1997
replace cpi=163.0 if year==1998
replace cpi=166.6 if year==1999
replace cpi=172.2 if year==2000
replace cpi=177.1 if year==2001
replace cpi=179.9 if year==2002
replace cpi=184.0 if year==2003
replace cpi=188.9 if year==2004
replace cpi=195.3 if year==2005
replace cpi=201.6 if year==2006
replace cpi=207.3 if year==2007
replace cpi=215.3 if year==2008
replace cpi=214.5 if year==2009
replace cpi=218.1 if year==2010
replace cpi=224.9 if year==2011
replace cpi=229.6 if year==2012
replace cpi=232.957 if year==2013
replace cpi=236.736 if year==2014
replace cpi=237.017 if year==2015
replace cpi=240.008 if year==2016
replace cpi=245.100 if year==2017
replace cpi=251.100 if year==2018
replace cpi=255.700 if year==2019
replace cpi=258.800 if year==2020
replace cpi=271.0 if year==2021


gen deflator=271.00/cpi
label var cpi "Consumer Price Index"
label var deflator "2021 price deflator factor"

for x in any trev frev srev srev_cap lrev plcont ptax_rev stax_rev olcont ///
texp cexp cexp_inst v91 v92 cexp_ss cexp_ssp cexp_ssis cexp_ssga cexp_sssa ///
cexp_ssom cexp_ssst cexp_ssbco cexp_ssns cexp_o tnelsc_exp tcapital ///
construction capland capie capoe capne l12 m12 q11 i86 tsalary sal_inst ///
tbenefits ben_inst ltdebt_bfy ltdebt_issued ltdebt_retired ltdebt_efy ///
std_bfy: gen rx_pp=(x/denrl)*deflator


********************************************************************

* Impose sample restrictions. 

for x in any rplcont_pp rptax_rev_pp rstax_rev_pp: replace x=. if x<0

* Replace 0 values to missing

for x in any rtrev_pp rlrev_pp rtexp_pp rcexp_pp rcexp_inst_pp rtsalary_pp rsal_inst_pp: replace x=. if x==0


replace denrl=. if denrl<=0

* Follow Sample Restrictions of LRS (2018)
egen mean_enrl=mean(denrl), by(ncesid)
drop if denrl>2*mean_enrl

/* Drop district/year observations where revenue or expenditures are five time higher
or lower than district average. */

replace rtrev_pp=. if rtrev_pp>45000
replace rtrev_pp=. if rtrev_pp<6000

for x in any rtexp_pp rcexp_pp rlrev_pp rtcapital_pp: replace x=. if rtrev_pp==.
 
replace rtcapital_pp=. if rtcapital_pp>100000
replace rtexp_pp=. if rtcapital_pp>100000
replace ptratio=. if ptratio>40
**********************************************************************************

label var rtrev_pp "Real Total Revenue Per-Pupil"
label var rfrev_pp "Real Federal Revenue Per-Pupil"
label var rsrev_pp "Real State Revenue Per-Pupil"
label var rlrev_pp "Real Local Revenue Per-Pupil"
label var rplcont_pp "Local Rev. Parent Gov Contribution, T02"
label var rolcont_pp "local Rev. City or County Contribution, D23"
label var rptax_rev_pp "Real Local Property Tax Revenue Per-Pupil"
label var rstax_rev_pp "Local Sales Tax Revenue, T09"
label var rtexp_pp "Real Total Expenditures Per-Pupil"
label var rcexp_pp "Real Current Expenditures Per-Pupil"
label var rcexp_inst_pp "Real Current Expenditures for Instruction Per-Pupil"
label var rtcapital_pp "Real Total Capital Expenditures Per-Pupil"
label var rconstruction_pp "Construction Exp., F12"
label var rltdebt_bfy_pp "Long-Term Debt BFY, A_19H"
label var rltdebt_issued_pp "Long-Term Debt Issued, A_21F"
label var rltdebt_retired_pp "Long-Term Debt Retired, A_31F"
label var rltdebt_efy "Long-Term Debt EFY, A_41F"
label var rsrev_cap_pp "State Aid for Capital, C11"  
label var rcapie_pp "Capital Instructional Equipment, K09"
label var rcapoe_pp "Capital Other Equipment, K10"
label var rcapne_pp "Capital nonspecified Equipment, K11"
label var rcapland_pp "Capital Land and Existing Structures, G15"
label var rcexp_ss_pp "Current Exp. Support Services, tcurssvc"
label var rcexp_ssp_pp "Current Exp. Support Services Pupils, E17"
label var rcexp_ssis_pp "Current Exp. S.S. Instructional Staff, E07"
label var rcexp_ssga_pp "Current Exp. S.S. General Admin, E08"
label var rcexp_sssa_pp "Current Exp. S.S. School Admin, E09"
label var rcexp_ssbco_pp "Current Exp. S.S. Business, Central Other, V90"
label var rcexp_ssom_pp "Current Exp. S.S. Operations and Maintenance, V40"
label var rcexp_ssst_pp "Current Exp. S.S. Student Transporation, V45"
label var rcexp_ssns_pp "Current Exp. S.S. Not Specified, V85"
label var rcexp_o_pp "Current Exp. Other Elem Secondary, tcuroth"
label var rtsalary_pp "Total Salaries, Z32"
label var rsal_inst_pp "Salaries Instruction, Z33"
label var rtbenefits_pp "Total Benefits, Z34"
label var rben_inst_pp "Benefits Instruction, V10"
label var rq11 "PAYMENTS TO OTHER SCHOOL SYSTEMS"
label var rl12 "PAYMENTS TO STATE GOVERNMENTS"
label var rm12 "PAYMENTS TO LOCAL GOVERNMENTS"
label var ri86 "INTEREST ON DEBT"
label var rv91 "PAYMENTS TO PRIVATE SCHOOLS"
label var rv92 "PAYMENTS TO CHARTER SCHOOLS"
label var rstd_bfy_pp "Short Term Debt, BFY  A_61V"
label var rtnelsc_exp_pp "TNELSC Exp"
label var ptratio "Pupil Teacher Ratio"


drop censusid srev_cap ptax_rev stax_rev olcont v91 v92 cexp_ss cexp_ssp cexp_ssis cexp_ssga cexp_sssa cexp_ssom ///
cexp_ssst cexp_ssbco cexp_ssns cexp_o tnelsc_exp capland capie capoe capne l12 m12 q11 i86 tsalary sal_inst ///
tbenefits ben_inst mrtrev_pp mrtexp_pp mean_enrl mrcexp_pp ucode15 local05 local95 statename year1 ///
rltdebt* ltdebt* rplcont_pp  plcont std_bfy  rstd_bfy_pp 

***********************************************************************************
* Merge in wind turbine data

merge 1:1 ncesid year using "$Data\treated_district_April13"
drop if _merge==2
drop _merge
drop if min_p_year<2005 & min_p_year~=.


* Create Per Pupil Total Capacity Variable

	replace cum_p_cap=0 if cum_p_cap==.
	replace cum_p_cap=0 if year<min_p_year

	
	gen tcap_pp=cum_p_cap/denrl
	lab var cum_p_cap "Total Capacity (kilowatts)"
	lab var tcap_pp "Total Capacity (kilowatts) Per-Pupil"
	
	
	* Find Enrollment at First Turbine Installation
	sort ncesid year
	by ncesid: gen obs=_n if cum_p_cap>0
	egen mobs=min(obs), by(ncesid)
	gen temp2=denrl if obs==mobs
	egen denrl_min=max(temp2), by(ncesid)
	
	
	* Create Capacity per Pupil using baseline enrollment;
	gen mcap_pp=cum_p_cap/denrl_min
	replace mcap=0 if mcap==.
	drop temp*
	label var mcap_pp "Total Capacity Per Pupil Base Enrollment"
	
/* According to the EWEA, the average size of onshore turbines being 
manufactured today is around 2.5-3 MW. Thus, to be conservative, start 
by dropping districts with a max t_cap of less than 2,000 KW or 2 MW. */

	egen temp=max(cum_p_cap) if cum_p_cap>0, by(ncesid)
	egen mcap1=max(temp), by(ncesid)
	replace mcap1=0 if mcap1==.
	drop if mcap1<2000 & mcap1~=0
	drop temp* obs*
	

* Drop nontraditional and charter schools

drop if dtype==3
drop if dtype==4
drop if dtype==5
drop if dtype==6
drop if dtype==7
drop if dtype==8
drop if dtype==9


* Keep Unfied and Elmentary Districts
drop if schlev=="02"
drop if schlev=="04"
drop if schlev=="05"
drop if schlev=="06"
drop if schlev=="07"
drop if schlev=="N"

	
* Drop other variables we do not use
drop trev frev srev lrev texp cexp cexp_inst tcapital construction sname cpov_child rv91_pp rv92_pp ///
rcexp_ss_pp rcexp_ssp_pp rcexp_ssis_pp rcexp_ssga_pp rcexp_sssa_pp rcexp_ssom_pp rcexp_ssst_pp ///
rcexp_ssbco_pp rcexp_ssns_pp rcexp_o_pp rtnelsc_exp_pp rcapland_pp rcapie_pp rcapoe_pp rcapne_pp ///
rl12_pp rm12_pp rq11_pp ri86_pp rtsalary_pp rsal_inst_pp rtbenefits_pp rben_inst_pp agchrt
	
*********************************
compress
save "$Data\ccd_data_wind_aug23", replace

